
library(DESeq2)
library(org.Hs.eg.db)
library(pheatmap)
library(stringr)
library(plyr)
library(ggplot2)


setwd("D:/software/R/Rtemp")

workpath = "C:/Users/ab998/OneDrive - University of Exeter/TDP43 project JG/AIO/project11219"

x <- org.Hs.egSYMBOL

geneid <- mappedkeys(x)
cols <- c("SYMBOL", "ENSEMBL") ## columns you want

geneid.df = select(org.Hs.eg.db, keys=geneid, columns=cols, keytype="ENTREZID")


validchrname = c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10",  
                 "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", 
                 "chr20", "chr21", "chr22", "chrX")



sample.df = read.table(paste(workpath, "/counts/aiotdp43_samples.txt", sep=""), header=T, sep="\t")
tmpdf = read.table(paste(workpath, "/counts/", sample.df$libname[1], ".ReadsPerGene.out.tab", sep=""), header=F, sep="\t", skip=4)  

genenames = tmpdf$V1
genenames = sapply(1:length(genenames), function(i){ unlist(strsplit(genenames[i], split = "[.]"))[1]  })

count.mat = matrix(0, nrow = dim(tmpdf)[1], ncol = dim(sample.df)[1])
count.mat[ ,1] = tmpdf$V2

for(sctr in 2: dim(sample.df)[1]){
  
  tmpdf = read.table(paste(workpath, "/counts/", sample.df$libname[sctr], ".ReadsPerGene.out.tab", sep=""), header=F, sep="\t", skip=4)  
  count.mat[ ,sctr] = tmpdf$V2
  
}

rownames(count.mat) = genenames
colnames(count.mat) = sample.df$sampleid

rownames(sample.df) = sample.df$sampleid

sample.df$condition[which(sample.df$condition == "nes0")] = "control"
sample.df$condition[which(sample.df$condition == "nesm")] = "nes"

tmpvec = apply(count.mat, 1, sum)
count.mat = count.mat[which(tmpvec >= 10), ]
dim(count.mat)

coldata = sample.df

coldata$condition = factor(coldata$condition)

dds <- DESeqDataSetFromMatrix(countData = count.mat,
                              colData = coldata,
                              design = ~ condition)



dds = estimateSizeFactors(dds)

dds.all = dds

dds = dds.all[, 1:6]


vsd = vst(dds, blind=FALSE)
sampleDists <- dist(t(assay(vsd)))
library("RColorBrewer")
library(pheatmap)
sampleDistMatrix <- as.matrix(sampleDists)
#rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-")
#colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)

plotPCA(vsd, intgroup=c("condition"))

pca.df = plotPCA(vsd, intgroup=c("condition"), returnData = TRUE)
pca.df$treatment = "Control"
pca.df$treatment[which(pca.df$condition == "nes")] = "NES"
pca.df$group = paste(pca.df$genotype, pca.df$treatment, sep=" ")

p = ggplot(pca.df) +
  aes(PC1, PC2, color = group) + 
  geom_point(size = 4) + 
  coord_fixed() +
  xlab("PC1: 84%")+ 
  ylab("PC2: 15%") +
  theme_bw() +
  theme(axis.text=element_text(size=16, color="black"),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.position = "bottom",
        legend.title = element_blank(),
        panel.grid.minor = element_blank()) +
  guides(colour = guide_legend(override.aes = list(size=4))) +
  theme(aspect.ratio = 1)

p

tiff(paste(workpath, "/figures/AIO-TDP43-model-D22-PCA.tiff", sep=""), res=300, compression = "jpeg", width = 10, height = 10, unit = "cm")#
par(mar=c(1,2,1,2))
p
dev.off()

rm(vsd, sampleDistMatrix, sampleDists)

####

dds = dds.all[, 7:12]


vsd = vst(dds, blind=FALSE)
sampleDists <- dist(t(assay(vsd)))
library("RColorBrewer")
library(pheatmap)
sampleDistMatrix <- as.matrix(sampleDists)
#rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-")
#colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)

plotPCA(vsd, intgroup=c("condition"))

pca.df = plotPCA(vsd, intgroup=c("condition"), returnData = TRUE)
pca.df$treatment = "Control"
pca.df$treatment[which(pca.df$condition == "nes")] = "NES"
pca.df$group = paste(pca.df$genotype, pca.df$treatment, sep=" ")

p = ggplot(pca.df) +
  aes(PC1, PC2, color = group) + 
  geom_point(size = 4) + 
  coord_fixed() +
  xlab("PC1: 96%")+ 
  ylab("PC2: 2%") +
  theme_bw() +
  theme(axis.text=element_text(size=16, color="black"),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.position = "bottom",
        legend.title = element_blank(),
        panel.grid.minor = element_blank()) +
  guides(colour = guide_legend(override.aes = list(size=4))) +
  theme(aspect.ratio = 1)

p

tiff(paste(workpath, "/figures/AIO-TDP43-model-d40-PCA.tiff", sep=""), res=300, compression = "jpeg", width = 10, height = 10, unit = "cm")#
par(mar=c(1,2,1,2))
p
dev.off()

rm(vsd, sampleDistMatrix, sampleDists)

####


